Skip to content

Custom Middleware

Build your own middleware from scratch.

Quick Start

1. Create Middleware Class

csharp
public class SimpleLoggingMiddleware : IAgentMiddleware
{
    public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
    {
        Console.WriteLine($"Iteration {context.Iteration} starting with {context.Messages.Count} messages");
        return Task.CompletedTask;
    }
}

2. Register

csharp
var agent = new AgentBuilder()
    .WithMiddleware(new SimpleLoggingMiddleware())
    .Build();

That's it! Implement only the hooks you need - all others have default no-op implementations.


Choosing Hooks

Match your use case to the right hook:

GoalHookExample
Inject context onceBeforeMessageTurnAsyncRAG, memory retrieval
Modify prompts per iterationBeforeIterationAsyncDynamic instructions
Retry LLM callsWrapModelCallAsyncExponential backoff
Count tokensWrapModelCallStreamingAsyncProgressive counting
Validate toolsBeforeToolExecutionAsyncCircuit breaker
Check permissionsBeforeFunctionAsyncHuman approval
Retry functionsWrapFunctionCallAsyncNetwork retry
Log resultsAfterIterationAsyncTelemetry
Extract memoryAfterMessageTurnAsyncConversation summary
Handle errorsOnErrorAsyncCentralized logging

See 05.1 Middleware Lifecycle for complete hook reference.


Pattern 1: Simple Hook

No state, no events - just modify context.

csharp
public class DynamicInstructionsMiddleware : IAgentMiddleware
{
    public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
    {
        // Add retry instruction on failures
        if (context.Iteration > 0)
        {
            context.Messages.Insert(0, new ChatMessage(
                ChatRole.System,
                "Previous attempt failed. Try a different approach."
            ));
        }

        // Reduce temperature on retries
        if (context.Iteration > 1)
        {
            context.Options.Temperature = 0.3f;
        }

        return Task.CompletedTask;
    }
}

Pattern 2: Wrap Hook (Retry)

Wrap LLM or function calls with custom logic.

csharp
public class RetryMiddleware : IAgentMiddleware
{
    private readonly int _maxRetries;

    public RetryMiddleware(int maxRetries = 3)
    {
        _maxRetries = maxRetries;
    }

    public async Task<ModelResponse> WrapModelCallAsync(
        ModelRequest request,
        Func<ModelRequest, Task<ModelResponse>> handler,
        CancellationToken ct)
    {
        for (int attempt = 0; attempt < _maxRetries; attempt++)
        {
            try
            {
                return await handler(request);
            }
            catch (HttpRequestException ex) when (attempt < _maxRetries - 1)
            {
                var delay = TimeSpan.FromSeconds(Math.Pow(2, attempt));
                await Task.Delay(delay, ct);
            }
        }

        // Final attempt
        return await handler(request);
    }
}

Pattern 3: State Management

Track data across iterations using typed state.

Step 1: Define state:

csharp
[MiddlewareState]
public sealed record TokenCountState
{
    public int TotalTokens { get; init; }
}

Step 2: Use in middleware:

csharp
public class TokenCounterMiddleware : IAgentMiddleware
{
    public async IAsyncEnumerable<ChatResponseUpdate> WrapModelCallStreamingAsync(
        ModelRequest request,
        Func<ModelRequest, IAsyncEnumerable<ChatResponseUpdate>> handler,
        [EnumeratorCancellation] CancellationToken ct)
    {
        var state = request.State.MiddlewareState.TokenCount ?? new();
        int tokens = 0;

        await foreach (var update in handler(request).WithCancellation(ct))
        {
            if (update.Contents != null)
            {
                foreach (var content in update.Contents)
                    if (content is TextContent text)
                        tokens += EstimateTokens(text.Text);
            }

            yield return update;
        }

        // Update state (needs context - see note below)
        // context.UpdateState(s => s with
        // {
        //     MiddlewareState = s.MiddlewareState.WithTokenCount(state with
        //     {
        //         TotalTokens = state.TotalTokens + tokens
        //     })
        // });
    }

    private int EstimateTokens(string text) => text.Length / 4;
}

Note: WrapModelCallStreamingAsync receives ModelRequest (not context). For state updates in Wrap hooks, emit an event or use BeforeIterationAsync/AfterIterationAsync instead.

See 05.2 Middleware State for full state guide.


Pattern 3.5: Persistent State (Cross-Run Caching)

Use Persistent = true for state that should survive across agent runs.

Step 1: Define persistent state:

csharp
[MiddlewareState(Persistent = true)]
public sealed record UserPreferencesState
{
    public Dictionary<string, string> Settings { get; init; } = new();
    public DateTime? LastUpdated { get; init; }
}

Step 2: Use in middleware:

csharp
public class UserPreferencesMiddleware : IAgentMiddleware
{
    public Task BeforeMessageTurnAsync(BeforeMessageTurnContext context, CancellationToken ct)
    {
        // Load preferences (automatically restored from previous runs)
        var lang = context.GetMiddlewareState<UserPreferencesState>()?
            .Settings.GetValueOrDefault("language");

        // Apply preferences to this run
        if (lang != null)
        {
            context.Messages.Insert(0, new ChatMessage(
                ChatRole.System,
                $"User prefers responses in {lang}"));
        }

        return Task.CompletedTask;
    }

    public Task AfterMessageTurnAsync(AfterMessageTurnContext context, CancellationToken ct)
    {
        // Extract language preference from conversation
        if (DetectLanguagePreference(context.FinalResponse, out var newLang))
        {
            context.UpdateMiddlewareState<UserPreferencesState>(prefs => prefs with
            {
                Settings = new Dictionary<string, string>(prefs.Settings)
                {
                    ["language"] = newLang
                },
                LastUpdated = DateTime.UtcNow
            });
        }

        return Task.CompletedTask;
    }

    private bool DetectLanguagePreference(ChatMessage message, out string language)
    {
        // Detection logic...
        language = "English";
        return false;
    }
}

When to use Persistent = true:

  • Expensive caches - Summarization results, embeddings
  • User preferences - Settings, permissions
  • Long-term metrics - Total API calls, usage stats

When to use transient (default):

  • Safety state - Error counts, circuit breakers
  • Per-run tracking - Current iteration, temp data

Example: Expensive Cache

csharp
[MiddlewareState(Persistent = true)]
public sealed record ConversationSummaryState
{
    public string? Summary { get; init; }
    public int MessagesSummarized { get; init; }
}

public class SummaryMiddleware : IAgentMiddleware
{
    public async Task BeforeMessageTurnAsync(BeforeMessageTurnContext context, CancellationToken ct)
    {
        var summary = context.GetMiddlewareState<ConversationSummaryState>()?.Summary;

        if (summary != null && context.Messages.Count > 100)
        {
            // Use cached summary instead of re-processing all messages
            context.Messages.Insert(0, new ChatMessage(
                ChatRole.System,
                $"Previous conversation summary: {summary}"));

            // Trim old messages
            context.Messages.RemoveRange(1, 50);
        }
    }

    public async Task AfterMessageTurnAsync(AfterMessageTurnContext context, CancellationToken ct)
    {
        var messagesSummarized = context.GetMiddlewareState<ConversationSummaryState>()?
            .MessagesSummarized ?? 0;

        if (context.Messages.Count > messagesSummarized + 50)
        {
            // Generate new summary (expensive LLM call)
            var newSummary = await GenerateSummary(context.Messages, ct);

            context.UpdateMiddlewareState<ConversationSummaryState>(_ => new()
            {
                Summary = newSummary,
                MessagesSummarized = context.Messages.Count
            });
            //   Summary persists to AgentSession automatically
            // Next run reuses it instead of expensive re-summarization!
        }
    }
}

See 05.2 Middleware State for full persistence guide.


Pattern 4: Request/Response Events

Interactive middleware with user approval.

Step 1: Define events:

csharp
public class PermissionRequestEvent : AgentEvent
{
    public required string FunctionName { get; init; }
    public required string RequestId { get; init; }
}

public class PermissionResponseEvent : AgentEvent
{
    public required string RequestId { get; init; }
    public required bool Approved { get; init; }
}

Step 2: Implement middleware:

csharp
public class PermissionMiddleware : IAgentMiddleware
{
    public async Task BeforeFunctionAsync(BeforeFunctionContext context, CancellationToken ct)
    {
        var requestId = Guid.NewGuid().ToString();

        // Emit request
        context.Emit(new PermissionRequestEvent
        {
            FunctionName = context.Function.Name,
            RequestId = requestId
        });

        // Wait for response (with timeout)
        using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
        cts.CancelAfter(TimeSpan.FromSeconds(30));

        try
        {
            var response = await context.WaitForResponseAsync<PermissionResponseEvent>(
                requestId,
                cts.Token
            );

            if (!response.Approved)
            {
                context.BlockExecution = true;
                context.OverrideResult = "Permission denied";
            }
        }
        catch (OperationCanceledException)
        {
            // Timeout - deny by default
            context.BlockExecution = true;
            context.OverrideResult = "Permission request timed out";
        }
    }
}

Step 3: Handle in UI:

csharp
await foreach (var evt in agent.RunAsync("Do something", ct))
{
    if (evt is PermissionRequestEvent req)
    {
        var approved = await ShowPermissionDialog(req.FunctionName);

        await agent.EmitEventAsync(new PermissionResponseEvent
        {
            RequestId = req.RequestId,
            Approved = approved
        });
    }
}

See 05.3 Middleware Events for full event guide.


Pattern 5: Multi-Hook Middleware

Use multiple hooks for complex logic.

csharp
public class ComprehensiveLoggingMiddleware : IAgentMiddleware
{
    private readonly ILogger _logger;

    public ComprehensiveLoggingMiddleware(ILogger logger)
    {
        _logger = logger;
    }

    public Task BeforeMessageTurnAsync(BeforeMessageTurnContext context, CancellationToken ct)
    {
        _logger.LogInformation("Turn started: {UserMessage}", context.UserMessage);
        return Task.CompletedTask;
    }

    public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
    {
        _logger.LogInformation("Iteration {Iteration}: {MessageCount} messages",
            context.Iteration, context.Messages.Count);
        return Task.CompletedTask;
    }

    public Task BeforeFunctionAsync(BeforeFunctionContext context, CancellationToken ct)
    {
        _logger.LogInformation("Calling function: {FunctionName}", context.Function.Name);
        return Task.CompletedTask;
    }

    public Task AfterFunctionAsync(AfterFunctionContext context, CancellationToken ct)
    {
        if (context.Exception != null)
        {
            _logger.LogError("Function {Name} failed: {Error}",
                context.Function.Name, context.Exception.Message);
        }
        else
        {
            _logger.LogInformation("Function {Name} succeeded", context.Function.Name);
        }

        return Task.CompletedTask;
    }

    public Task AfterMessageTurnAsync(AfterMessageTurnContext context, CancellationToken ct)
    {
        _logger.LogInformation("Turn completed: {Response}", context.FinalResponse.Text);
        return Task.CompletedTask;
    }

    public Task OnErrorAsync(ErrorContext context, CancellationToken ct)
    {
        _logger.LogError("Error in {Source}: {Error}",
            context.Source, context.Error.Message);
        return Task.CompletedTask;
    }
}

Complete Example: Rate Limiter

Full-featured middleware with state, events, and multiple hooks.

State:

csharp
[MiddlewareState]
public sealed record RateLimitState
{
    public DateTime? WindowStart { get; init; }
    public int CallsInWindow { get; init; }
}

Events:

csharp
public class RateLimitExceededEvent : AgentEvent
{
    public required int CallsInWindow { get; init; }
    public required int MaxCalls { get; init; }
    public required TimeSpan ResetIn { get; init; }
}

Middleware:

csharp
public class RateLimitMiddleware : IAgentMiddleware
{
    public int MaxCallsPerMinute { get; set; } = 10;

    public async Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
    {
        var state = context.GetMiddlewareState<RateLimitState>();
        var windowStart = state?.WindowStart;
        var callsInWindow = state?.CallsInWindow ?? 0;

        var now = DateTime.UtcNow;

        // Check if in same window
        if (windowStart.HasValue &&
            (now - windowStart.Value) < TimeSpan.FromMinutes(1))
        {
            if (callsInWindow >= MaxCallsPerMinute)
            {
                var resetIn = TimeSpan.FromMinutes(1) - (now - windowStart.Value);

                // Emit event
                context.Emit(new RateLimitExceededEvent
                {
                    CallsInWindow = callsInWindow,
                    MaxCalls = MaxCallsPerMinute,
                    ResetIn = resetIn
                });

                // Wait for window to reset
                await Task.Delay(resetIn, ct);

                // Start new window
                context.UpdateMiddlewareState<RateLimitState>(_ => new RateLimitState
                {
                    WindowStart = DateTime.UtcNow,
                    CallsInWindow = 1
                });
            }
            else
            {
                // Increment count
                context.UpdateMiddlewareState<RateLimitState>(s => s with
                {
                    CallsInWindow = s.CallsInWindow + 1
                });
            }
        }
        else
        {
            // Start new window
            context.UpdateMiddlewareState<RateLimitState>(_ => new RateLimitState
            {
                WindowStart = now,
                CallsInWindow = 1
            });
        }
    }
}

Best Practices

1. Use Typed Contexts

csharp
//   GOOD: Typed context
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
    context.Messages.Add(systemMessage); // Compile-time safe
}

//    BAD: No type safety without typed contexts

2. Handle Null State

csharp
//   GOOD: Extension methods auto-instantiate
var count = context.GetMiddlewareState<MyState>()?.Count ?? 0;
context.UpdateMiddlewareState<MyState>(s => s with { Count = s.Count + 1 });

//   ALSO GOOD: Advanced - use when updating core state too
context.UpdateState(s => {
    var state = s.MiddlewareState.MyState ?? new();
    return s with { /* ... */ };
});

//    BAD: Would be NullReferenceException (but won't compile - context.State is internal)
// var count = context.State.MiddlewareState.MyState.Count;

3. Use Immutable Updates

csharp
//   GOOD: Extension methods with 'with' expression
context.UpdateMiddlewareState<MyState>(s => s with { Count = s.Count + 1 });

//   ALSO GOOD: Advanced approach for complex updates
context.UpdateState(s =>
{
    var state = s.MiddlewareState.MyState ?? new();
    return s with
    {
        MiddlewareState = s.MiddlewareState.WithMyState(state with { Count = state.Count + 1 })
    };
});

//    BAD: Would be mutation
// state.Count++; // Compile error - init-only property

4. Set Timeouts on WaitForResponseAsync

csharp
//   GOOD: Timeout
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(30));
var response = await context.WaitForResponseAsync<MyEvent>(requestId, cts.Token);

//    BAD: No timeout - could hang forever
var response = await context.WaitForResponseAsync<MyEvent>(requestId, ct);

5. Don't Use Instance Fields for State

csharp
//    BAD: Race condition with parallel RunAsync calls
private int _count = 0;

public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
    _count++; // Not thread-safe!
}

//   GOOD: Use middleware state extensions
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
    context.UpdateMiddlewareState<MyCounter>(s => s with { Count = s.Count + 1 });
    return Task.CompletedTask;
}

6. Only Implement Hooks You Need

csharp
//   GOOD: Only implement required hooks
public class SimpleMiddleware : IAgentMiddleware
{
    public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
    {
        // Only hook we need
    }
    // All other hooks use default no-op implementation
}

//    BAD: Implementing unused hooks
public class VerboseMiddleware : IAgentMiddleware
{
    public Task BeforeMessageTurnAsync(...) => Task.CompletedTask; // Unused
    public Task AfterMessageTurnAsync(...) => Task.CompletedTask; // Unused
    public Task BeforeIterationAsync(...) { /* Actual logic */ }
    // etc...
}

Testing Middleware

csharp
[Fact]
public async Task TestRetryMiddleware()
{
    var callCount = 0;
    var middleware = new RetryMiddleware(maxRetries: 3);

    var request = new ModelRequest { /* ... */ };

    var response = await middleware.WrapModelCallAsync(
        request,
        async (req) =>
        {
            callCount++;
            if (callCount < 3)
                throw new HttpRequestException("Simulated error");

            return new ModelResponse { /* success */ };
        },
        CancellationToken.None
    );

    Assert.Equal(3, callCount); // Retried twice, succeeded on 3rd
    Assert.NotNull(response);
}

Next Steps

Released under the MIT License.